library(tidyverse)
library(knitr)
library(plotly) ; library(viridis) ; library(gridExtra) ; library(RColorBrewer)
library(biomaRt)
library(Rtsne)
library(caret) ; library(ROCR) ; library(car)
library(corrplot)
library(expss) ; library(knitr)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Background


In 20_04_07_LR.html we performed a logistic regression but we discovered that the features were strongly correlated. This inflates the standard error of the coefficients, making them no longer interpretable.

# Clusterings
clustering_selected = 'DynamicHybridMergedSmall'
clusterings = read_csv('./../Data/clusters.csv')
clusterings$Module = clusterings[,clustering_selected] %>% data.frame %>% unlist %>% unname

assigned_module = data.frame('ID' = clusterings$ID, 'Module' = clusterings$Module)

# Original dataset
original_dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'), row.names=1)

# Model dataset
load('./../Data/LR_model.RData')

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org')
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=rownames(dataset), mart=mart)

Variance Inflation Factor (VIF) and correlation plot

# VIF
plot_data = data.frame('Feature' = car::vif(fit) %>% sort %>% names,
                       'VIF' = car::vif(fit) %>% sort %>% unname) %>%
            mutate(outlier = VIF>10)

plot_data %>% ggplot(aes(reorder(Feature, -VIF), VIF, fill = !outlier)) + geom_bar(stat='identity') + scale_y_log10() +
              geom_hline(yintercept = 10, color = 'gray', linetype = 'dashed') + xlab('Model Features') + theme_minimal() +
              theme(legend.position = 'none', axis.text.x = element_text(angle = 90, hjust = 1))

# Correlation plot
corrplot.mixed(cor(train_set[,-ncol(train_set)]), lower = 'number', lower.col = 'gray', number.cex = .6, tl.pos = 'l', tl.col = '#666666')

rm(getinfo, mart, clusterings)

Possible solutions to Multicollinearity:


  1. Remove all variables with a VIF>10: We would lose all but three of our variables, not ideal

  2. Do Principal Component Regression: We would lose the relation between the prediction and the original features, which would be interesting to study

  3. Don’t do anything: Multicollinearity affects the coefficients and p-values of the regression, but it doesn’t affect the predictions, precision of the predictions or the goodness-of-fit statistics ref, but as with the previous option, we cannot study the coefficients of the regression

  4. Use Ridge Regression


Ridge Regression


Using the same train and test sets created in 20_04_07_LR.html


Train model

train_set$SFARI = train_set$SFARI %>% as.factor

lambda_seq = 10^seq(1, -4, by = -.1)

set.seed(123)
fit = train(SFARI ~., data = train_set, method = 'glmnet', trControl = trainControl('cv', number = 10),
              tuneGrid = expand.grid(alpha = 0, lambda = lambda_seq))

cat(paste0('The best model has a lambda of ', round(fit$bestTune$lambda,4)))
## The best model has a lambda of 0.0631
rm(lambda_seq)

Predict labels in test set

predictions = fit %>% predict(test_set, type='prob')

test_set$prob = predictions$`TRUE`
test_set$pred = test_set$prob>0.5

rm(predictions)


Performance metrics


Confusion matrix

conf_mat = test_set %>% apply_labels(SFARI = 'Actual Labels', 
                                     prob = 'Assigned Probability', 
                                     pred = 'Label Prediction')

cro(conf_mat$SFARI, list(conf_mat$pred, total()))
 Label Prediction     #Total 
 FALSE   TRUE   
 Actual Labels 
   FALSE  8031 4226   12257
   TRUE  81 96   177
   #Total cases  8112 4322   12434
rm(conf_mat)

Accuracy

acc = mean(test_set$SFARI==test_set$pred)

cat(paste0('Accuracy = ', round(acc,4)))
## Accuracy = 0.6536
rm(acc)

ROC Curve

pred_ROCR = prediction(test_set$prob, test_set$SFARI)

roc_ROCR = performance(pred_ROCR, measure='tpr', x.measure='fpr')
AUC = performance(pred_ROCR, measure='auc')@y.values[[1]]

plot(roc_ROCR, main=paste0('ROC curve (AUC=',round(AUC,2),')'), col='#009999')
abline(a=0, b=1, col='#666666')

Lift Curve

lift_ROCR = performance(pred_ROCR, measure='lift', x.measure='rpp')
plot(lift_ROCR, main='Lift curve', col='#86b300')

rm(pred_ROCR, roc_ROCR, AUC, lift_ROCR)




Coefficients


  • MTcor has a low coefficient and Gene Significance has a negative coefficient
gene_corr_info = dataset %>% mutate('ID' = rownames(dataset)) %>% dplyr::select(ID, MTcor, SFARI) %>% left_join(assigned_module, by ='ID') %>%
                 mutate(Module = gsub('#','',Module))

coef_info = coef(fit$finalModel, fit$bestTune$lambda) %>% as.matrix %>% data.frame %>% dplyr::rename('coef' = X1) %>%
            mutate('feature' = gsub('MM.','',rownames(.))) %>% left_join(gene_corr_info, by = c('feature' = 'Module')) %>% 
            dplyr::select(feature, coef, MTcor, SFARI) %>% group_by(feature, coef, MTcor) %>% summarise('SFARI_perc' = mean(SFARI)) %>%
            arrange(desc(coef))

kable(coef_info %>% dplyr::select(feature, coef) %>% rename('Feature' = feature, 'Coefficient' = coef),
      align = 'cc', caption = 'Regression Coefficients')
Regression Coefficients
Feature Coefficient
FF62BC 0.8589655
00A7FF 0.6848483
FE6E8A 0.5886960
00BA38 0.5350276
00BF7D 0.4516837
D89000 0.3579370
00BCD8 0.3572140
00C0AF 0.3081397
D376FF 0.2547095
F8766D 0.2214329
39B600 0.2082034
B79F00 0.1676503
F564E3 0.1395635
00C097 0.1129423
00BFC4 0.1073604
FF67A4 0.0935501
E58700 0.0914464
6BB100 0.0578363
MTcor 0.0222060
absGS -0.0331095
B983FF -0.0342090
00BD5F -0.0408827
A3A500 -0.0630330
EF7F49 -0.0715613
GS -0.0996442
00B0F6 -0.1285335
E76BF3 -0.1397291
FD61D1 -0.1460062
8AAB00 -0.1559012
(Intercept) -0.1945565
C99800 -0.2764409
9590FF -0.3914160
00B7E9 -0.3946339
619CFF -0.4782678

  • There is a positive relation between the coefficient assigned to the membership of each module and the percentage of SFARI genes that are assigned to that module
ggplotly(coef_info %>% dplyr::rename('Module' = feature) %>% filter(!is.na(MTcor)) %>%
              ggplot(aes(coef, SFARI_perc)) +  geom_smooth(method = 'lm', color = 'gray', alpha = 0.1) + 
              geom_point(aes(id = Module), color = paste0('#',coef_info$feature[!is.na(coef_info$MTcor)])) + 
              theme_minimal() + xlab('Coefficient') + 
              ylab('% of SFARI genes in Module'))


  • There is a really small negative relation between the coefficient and the correlation of the module and the diagnosis.

This is not a surprise since we knew that there was a negative relation between SFARI genes and Module-Diagnosis correlation from Preprocessing/Gandal/AllRegions/RMarkdowns/20_04_03_WGCNA_modules_EA.html. The fact that the relation between coefficient and Module-Diagnosis correlation is so small now could even be a good sign that the model is picking some biological signal as well as the SFARI patterns (since the relation with the biological signals is positive)

ggplotly(coef_info %>% dplyr::rename('Module' = feature) %>% filter(!is.na(MTcor)) %>%
              ggplot(aes(coef, MTcor)) +  geom_smooth(method = 'lm', color = 'gray', alpha = 0.1) + 
              geom_point(aes(id = Module), color = paste0('#',coef_info$feature[!is.na(coef_info$MTcor)])) + 
              theme_minimal() + xlab('Coefficient') + 
              ylab('Module-Diagnosis correlation'))




Analyse model


SFARI genes have a slightly higher score distribution than the rest

plot_data = test_set %>% dplyr::select(prob, SFARI)

ggplotly(plot_data %>% ggplot(aes(prob, fill=SFARI, color=SFARI)) + geom_density(alpha=0.3) + xlab('Score') +
         geom_vline(xintercept = mean(plot_data$prob[plot_data$SFARI]), color = '#00C0C2', linetype = 'dashed') +
         geom_vline(xintercept = mean(plot_data$prob[!plot_data$SFARI]), color = '#FF7371', linetype = 'dashed') +
         theme_minimal() + ggtitle('Model score distribution by SFARI Label'))
  • There seems to be a positive relation between the SFARI scores and the probability assigned by the model

  • The number of observations when separating the test set by SFARI score is quite small, so this is not a robust result, specially for scores 1, 2 and 6

plot_data = test_set %>% mutate(ID=rownames(test_set)) %>% dplyr::select(ID, prob) %>%
            left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID') %>%
            dplyr::select(ID, prob, gene.score) %>% apply_labels(gene.score='SFARI Gene score')

cro(plot_data$gene.score)
 #Total 
 SFARI Gene score 
   1  5
   2  12
   3  38
   4  86
   5  32
   6  4
   None  12257
   #Total cases  12434
mean_vals = plot_data %>% group_by(gene.score) %>% summarise(mean_prob = mean(prob))

ggplotly(plot_data %>% ggplot(aes(gene.score, prob, fill=gene.score)) + geom_boxplot() + 
              scale_fill_manual(values=SFARI_colour_hue(r=c(1:6,8,7))) + 
              ggtitle('Distribution of probabilities by SFARI score') +
              xlab('SFARI score') + ylab('Probability') + theme_minimal())
rm(mean_vals)

Genes with highest scores in test set


  • Considering the class imbalance in the test set (1:69), there are many SFARI scores in here (1:13)

  • 3 of the 5 SFARI genes with a score of 1 in the test set are in this top 50 list

  • There aren’t any SFARI genes with a score lower than 3 (even though 69% of the SFARI genes in the test score have a score lower than 3)

test_set %>% dplyr::select(prob, SFARI) %>% mutate(ID = rownames(test_set)) %>% 
             arrange(desc(prob)) %>% top_n(50, wt=prob) %>%
             left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID')  %>% 
             left_join(gene_names, by = c('ID'='ensembl_gene_id')) %>%
             dplyr::rename('GeneSymbol' = external_gene_id, 'Probability' = prob, 'ModuleDiagnosis_corr' = MTcor, 'GeneSignificance' = GS) %>%
             mutate(ModuleDiagnosis_corr = round(ModuleDiagnosis_corr,4), Probability = round(Probability,4), 
                    GeneSignificance = round(GeneSignificance,4)) %>%
             dplyr::select(GeneSymbol, GeneSignificance, ModuleDiagnosis_corr, Module, Probability, gene.score) %>%
             kable(caption = 'Genes with highest model probabilities from the test set')
Genes with highest model probabilities from the test set
GeneSymbol GeneSignificance ModuleDiagnosis_corr Module Probability gene.score
RPRD2 -0.0922 -0.6031 #00BA38 0.8552 None
MYCBP2 -0.3975 -0.6031 #00BA38 0.8404 None
GATAD2B -0.4221 -0.6031 #00BA38 0.8316 None
RNF111 -0.2410 0.0586 #FE6E8A 0.8279 None
KMT2D -0.3255 -0.6031 #00BA38 0.8273 None
ARID1B 0.2711 0.1127 #FF62BC 0.8247 1
EP400 -0.1671 -0.6031 #00BA38 0.8212 3
DROSHA -0.4111 -0.6031 #00BA38 0.8183 None
HUWE1 -0.5026 -0.6031 #00BA38 0.8183 None
ATN1 -0.2052 -0.6031 #00BA38 0.8167 None
RFX7 0.1372 0.0586 #FE6E8A 0.8150 None
SMG7 -0.2560 -0.6031 #00BA38 0.8148 None
RASGRF1 -0.7425 -0.9514 #00C0AF 0.8139 None
CLIP3 -0.5368 -0.6031 #00BA38 0.8135 None
KMT2A 0.1347 0.7916 #00C097 0.8127 1
SAP130 -0.2482 -0.6031 #00BA38 0.8110 None
ASAP1 -0.0896 -0.6031 #00BA38 0.8085 None
TLN2 -0.4783 -0.9514 #00C0AF 0.8083 None
UBR4 -0.2856 -0.6031 #00BA38 0.8080 None
FMNL1 -0.2223 -0.6031 #00BA38 0.8073 None
NFIC -0.4414 -0.6031 #00BA38 0.8059 None
AAK1 -0.6913 -0.9514 #00C0AF 0.8030 None
NF1 -0.4123 -0.6031 #00BA38 0.8025 None
MIDN -0.0520 -0.6031 #00BA38 0.8022 None
CACNG3 -0.4689 -0.6031 #00BA38 0.8022 None
EIF4G3 -0.4827 -0.8040 #00B7E9 0.8021 None
UBAP2L -0.3318 -0.6031 #00BA38 0.7994 None
R3HDM2 -0.4078 -0.6031 #00BA38 0.7989 None
HUNK -0.3273 -0.6750 #D376FF 0.7986 None
NAV1 -0.1734 -0.6031 #00BA38 0.7961 None
MAP3K13 -0.4321 -0.6750 #D376FF 0.7954 None
MEF2D -0.4172 -0.6031 #00BA38 0.7946 None
SPTAN1 -0.2903 -0.6031 #00BA38 0.7945 None
POM121C -0.2791 -0.6031 #00BA38 0.7942 None
USP32 -0.6248 -0.6031 #00BA38 0.7935 None
KCNJ6 -0.1379 -0.9514 #00C0AF 0.7932 None
BCL9 0.1299 -0.6031 #00BA38 0.7930 None
CELF2 -0.0605 -0.9514 #00C0AF 0.7913 None
AMPD3 -0.2810 -0.9514 #00C0AF 0.7912 None
SCAF4 0.0185 -0.6031 #00BA38 0.7912 None
FMN2 -0.4335 -0.6031 #00BA38 0.7910 None
NMNAT2 -0.6817 -0.9514 #00C0AF 0.7898 None
BRPF3 -0.4391 -0.6031 #00BA38 0.7895 None
SIK3 -0.1206 -0.0094 #00A7FF 0.7891 None
RAB7A -0.4020 -0.6031 #00BA38 0.7890 None
SGIP1 -0.5363 -0.6031 #00BA38 0.7877 None
FOXJ2 0.2507 0.1127 #FF62BC 0.7877 None
RABGAP1L -0.5226 -0.6031 #00BA38 0.7860 None
KMT2E 0.0723 0.7916 #00C097 0.7858 3
PRPF8 -0.5452 -0.6031 #00BA38 0.7854 None




Negative samples distribution


Selecting the Negative samples in the test set

negative_set = test_set %>% filter(SFARI)
  
  
  
negative_set = dataset %>% filter(!SFARI & !rownames(.) %in% rownames(train_set)) %>% dplyr::select(-SFARI)
rownames(negative_set) = rownames(dataset)[!dataset$SFARI & !rownames(dataset) %in% rownames(train_set)]

predictions = predict(fit, negative_set, type='prob') %>% as.vector
negative_set$prob = predictions$`TRUE`
negative_set$pred = negative_set$prob>0.5

negative_set_table = negative_set %>% apply_labels(prob = 'Assigned Probability', 
                                                   pred = 'Label Prediction')

rm(predictions)
cro(negative_set_table$pred)
 #Total 
 Label Prediction 
   FALSE  8031
   TRUE  4226
   #Total cases  12257
cat(paste0('\n', sum(negative_set$pred), ' genes are predicted as ASD-related'))
## 
## 4226 genes are predicted as ASD-related
negative_set %>% ggplot(aes(prob)) + geom_density(color='#F8766D', fill='#F8766D', alpha=0.5) +
                 geom_vline(xintercept=0.5, color='#333333', linetype='dotted') + 
                 ggtitle('Probability distribution of the Negative samples in the test set') + 
                 theme_minimal()


Probability and Gene Significance


  • There’s a lot of noise, but the probability the model assigns to each gene seems to have a negative relation with the Gene Significance (under-expressed genes having on average the higher probabilities and over-expressed genes the lowest)

  • The pattern is strongest in under-expressed genes

negative_set %>% ggplot(aes(prob, GS, color=MTcor)) + geom_point() + geom_smooth(method='loess', color='#666666') +
                 geom_hline(yintercept=0, color='gray', linetype='dashed') + xlab('Probability') +
                 scale_color_gradientn(colours=c('#F8766D','white','#00BFC4')) + 
                 ggtitle('Relation between Probability and Gene Significance') + theme_minimal()


Probability and Module-Diagnosis correlation


On average, the model seems to be assigning a probability inversely proportional to the Module-Diagnosis correlation of the module, with the highest positively correlated modules having the lowest average probability and the highest negatively correlated modules the highest average probability. But the difference isn’t big

negative_set %>% ggplot(aes(MTcor, prob, color=GS)) + geom_point() + geom_smooth(method='loess', color='#666666') + 
                 geom_hline(yintercept=mean(negative_set$prob), color='gray', linetype='dashed') +
                 scale_color_gradientn(colours=c('#F8766D','#F8766D','white','#00BFC4','#00BFC4')) + 
                 xlab('Modules ordered by their correlation to ASD') + ylab('Model probability') +
                 theme_minimal()

Summarised version, plotting by module instead of by gene

The difference in the trend lines between this plot and the one above is that the one above takes all the points into consideration while this considers each module as an observation by itself, so the top one is strongly affected by big modules and the bottom one treats all modules the same

The model seems to give higher probabilities to genes belonging to modules with a small (absolute) correlation to Diagnosis (this is unexpected)

plot_data = negative_set %>% group_by(MTcor) %>% summarise(mean = mean(prob), sd = sd(prob), n = n()) %>%
            mutate(MTcor_sign = ifelse(MTcor>0, 'Positive', 'Negative')) %>% left_join(original_dataset, by='MTcor') %>%
            dplyr::select(Module, MTcor, MTcor_sign, mean, sd, n) %>% distinct()
colnames(plot_data)[1] = 'ID'

ggplotly(plot_data %>% ggplot(aes(MTcor, mean, size=n, color=MTcor_sign)) + geom_point(aes(id=ID)) + 
         geom_smooth(method='loess', color='gray', se=FALSE) + geom_smooth(method='lm', se=FALSE) + 
         xlab('Module-Diagnosis correlation') + ylab('Mean Probability by Model') + theme_minimal())


Probability and level of expression


There is a positive relation between level of expression and probability, the model seems to be capturing indirectly the level of expression of the genes to make the prediction, so it’s introducing the same bias

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
mean_and_sd = data.frame(ID=rownames(datExpr), meanExpr=rowMeans(datExpr), sdExpr=apply(datExpr,1,sd))

plot_data = negative_set %>% mutate(ID=rownames(negative_set)) %>% left_join(mean_and_sd, by='ID') %>% 
            left_join(original_dataset %>% mutate(ID=rownames(original_dataset)) %>% 
                      dplyr::select(ID, Module), by='ID')
colnames(plot_data)[ncol(plot_data)] = 'Module'

plot_data %>% ggplot(aes(meanExpr, prob)) + geom_point(alpha=0.2, color='#0099cc') + 
              geom_smooth(method='loess', color='gray', alpha=0.3) + 
              geom_smooth(method='lm', color='#999999', se=FALSE, alpha=1) + 
              theme_minimal() + ggtitle('Mean expression vs model probability by gene')

rm(mean_and_sd)
plot_data2 = plot_data %>% group_by(Module) %>% summarise(meanExpr = mean(meanExpr), meanProb = mean(prob), n=n())

ggplotly(plot_data2 %>% ggplot(aes(meanExpr, meanProb, size=n)) + geom_point(color=plot_data2$Module) + 
         geom_smooth(method='loess', se=TRUE, color='gray', alpha=0.1, size=0.7) + 
         geom_smooth(method='lm', se=FALSE, color='gray') + theme_minimal() + theme(legend.position='none') + 
         ggtitle('Mean expression vs model probability by Module'))
rm(plot_data2)


Probability and lfc


There is a relation between probability and lfc, so it **IS*() capturing a bit of true information (because lfc and mean expression were negatively correlated and it still has a positive relation in the model)

  • The relation is stronger in genes under-expressed in ASD
plot_data = negative_set %>% mutate(ID=rownames(negative_set)) %>% 
            left_join(DE_info %>% mutate(ID=rownames(DE_info)), by='ID')

plot_data %>% ggplot(aes(log2FoldChange, prob)) + geom_point(alpha=0.1, color='#0099cc') + 
              geom_smooth(method='loess', color='gray', alpha=0.3) + 
              theme_minimal() + ggtitle('lfc vs model probability by gene')

  • The relation is stronger in Differentially Expressed genes
plot_data %>% mutate(DE = padj<0.05) %>% ggplot(aes(log2FoldChange, prob, color=DE)) + geom_point(alpha=0.1) + 
              geom_smooth(method='loess', alpha=0.3) + 
              theme_minimal() + ggtitle('lfc vs model probability by gene')



Conclusion


The model is capturing the mean level of expression of the genes (indirectly through module memberhsip), which is a strong bias found in the SFARI scores, but it seems to be capturing a bit of true biological signal as well (based on the GS and the log fold change plots)


Saving results

save(train_set, test_set, negative_set, fit, dataset, file='./../Data/Ridge_model.RData')




Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] expss_0.10.2       corrplot_0.84      car_3.0-7          carData_3.0-3     
##  [5] ROCR_1.0-7         gplots_3.0.3       caret_6.0-86       lattice_0.20-40   
##  [9] Rtsne_0.15         biomaRt_2.40.5     RColorBrewer_1.1-2 gridExtra_2.3     
## [13] viridis_0.5.1      viridisLite_0.3.0  plotly_4.9.2       knitr_1.28        
## [17] forcats_0.5.0      stringr_1.4.0      dplyr_0.8.5        purrr_0.3.3       
## [21] readr_1.3.1        tidyr_1.0.2        tibble_3.0.0       ggplot2_3.3.0     
## [25] tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.4-0                 plyr_1.8.6                 
##   [5] lazyeval_0.2.2              splines_3.6.3              
##   [7] BiocParallel_1.18.1         crosstalk_1.1.0.1          
##   [9] GenomeInfoDb_1.20.0         digest_0.6.25              
##  [11] foreach_1.5.0               htmltools_0.4.0            
##  [13] gdata_2.18.0                fansi_0.4.1                
##  [15] magrittr_1.5                checkmate_2.0.0            
##  [17] memoise_1.1.0               cluster_2.1.0              
##  [19] openxlsx_4.1.4              annotate_1.62.0            
##  [21] recipes_0.1.10              modelr_0.1.6               
##  [23] gower_0.2.1                 matrixStats_0.56.0         
##  [25] prettyunits_1.1.1           jpeg_0.1-8.1               
##  [27] colorspace_1.4-1            blob_1.2.1                 
##  [29] rvest_0.3.5                 haven_2.2.0                
##  [31] xfun_0.12                   crayon_1.3.4               
##  [33] RCurl_1.98-1.1              jsonlite_1.6.1             
##  [35] genefilter_1.66.0           survival_3.1-11            
##  [37] iterators_1.0.12            glue_1.3.2                 
##  [39] gtable_0.3.0                zlibbioc_1.30.0            
##  [41] ipred_0.9-9                 XVector_0.24.0             
##  [43] DelayedArray_0.10.0         shape_1.4.4                
##  [45] BiocGenerics_0.30.0         abind_1.4-5                
##  [47] scales_1.1.0                DBI_1.1.0                  
##  [49] Rcpp_1.0.4                  xtable_1.8-4               
##  [51] progress_1.2.2              htmlTable_1.13.3           
##  [53] foreign_0.8-75              bit_1.1-15.2               
##  [55] Formula_1.2-3               stats4_3.6.3               
##  [57] lava_1.6.7                  prodlim_2019.11.13         
##  [59] glmnet_3.0-2                htmlwidgets_1.5.1          
##  [61] httr_1.4.1                  acepack_1.4.1              
##  [63] ellipsis_0.3.0              pkgconfig_2.0.3            
##  [65] XML_3.99-0.3                farver_2.0.3               
##  [67] nnet_7.3-13                 dbplyr_1.4.2               
##  [69] locfit_1.5-9.4              tidyselect_1.0.0           
##  [71] labeling_0.3                rlang_0.4.5                
##  [73] reshape2_1.4.3              AnnotationDbi_1.46.1       
##  [75] munsell_0.5.0               cellranger_1.1.0           
##  [77] tools_3.6.3                 cli_2.0.2                  
##  [79] generics_0.0.2              RSQLite_2.2.0              
##  [81] broom_0.5.5                 evaluate_0.14              
##  [83] yaml_2.2.1                  ModelMetrics_1.2.2.2       
##  [85] bit64_0.9-7                 fs_1.4.0                   
##  [87] zip_2.0.4                   caTools_1.18.0             
##  [89] nlme_3.1-144                xml2_1.2.5                 
##  [91] compiler_3.6.3              rstudioapi_0.11            
##  [93] png_0.1-7                   curl_4.3                   
##  [95] e1071_1.7-3                 reprex_0.3.0               
##  [97] geneplotter_1.62.0          stringi_1.4.6              
##  [99] highr_0.8                   Matrix_1.2-18              
## [101] vctrs_0.2.4                 pillar_1.4.3               
## [103] lifecycle_0.2.0             data.table_1.12.8          
## [105] bitops_1.0-6                GenomicRanges_1.36.1       
## [107] latticeExtra_0.6-29         R6_2.4.1                   
## [109] KernSmooth_2.23-16          rio_0.5.16                 
## [111] IRanges_2.18.3              codetools_0.2-16           
## [113] MASS_7.3-51.5               gtools_3.8.2               
## [115] assertthat_0.2.1            SummarizedExperiment_1.14.1
## [117] DESeq2_1.24.0               withr_2.1.2                
## [119] S4Vectors_0.22.1            GenomeInfoDbData_1.2.1     
## [121] mgcv_1.8-31                 parallel_3.6.3             
## [123] hms_0.5.3                   grid_3.6.3                 
## [125] rpart_4.1-15                timeDate_3043.102          
## [127] class_7.3-16                rmarkdown_2.1              
## [129] pROC_1.16.2                 base64enc_0.1-3            
## [131] Biobase_2.44.0              lubridate_1.7.4